---
title: "Design for ``Refugee narratives and public opinion\n during the COVID-19 pandemic''"
author: ""
date: "this ver: Thursday April 30, 2020"
output:
  pdf_document: default
  html_document: default
---

We're going to use *DeclareDesign*.

```{r setup, eval=FALSE}
knitr::opts_chunk$set(echo = TRUE)
#install.packages(c("DeclareDesign", "fabricatr", "randomizr", "estimatr", "DesignLibrary"))
library(DeclareDesign)
library(DesignLibrary)
library(tidyverse)
library(kableExtra)
library(xtable)
```

For hypotheses please refer to main text.

## Experiment 1: 5 arms

* T1 = covid - US
* T2 = covid - PA
* T3 = covid - Lancaster
* T4 = covid - No location
* T5 = no covid - US

We want to learn whether there is differential support for refugee ads on Facebook. Respondents
are randomly assigned to receive ads with refugees with information on the above five arms. Assignment to each of the five arms is with equal probabilities, and other then mention of covid and location, ads otherwise identical. We define our outcome of interest as the difference in click rates between experimental conditions.

In settings of multiple treatment arms, we could do a number of pairwise comparisons: across treatments and each treatment against control.

#### Design Declaration A

* Model: 

We specify a population of size $N$ where a unit $i$ has a potential outcome, $Y_i(Z=0)$, when it remains untreated and $M\, (m=1,2,\cdots,M)$ potential outcomes defined according to the treatment that it receives. The effect of each treatment on the outcome of unit $i$ is equal to the difference in the potential outcome under treatment condition m and the control condition: $Y_i(Z=m)-Y_i(Z=0)$.

* Inquiry:

We are interested in all of the pairwise comparisons between arms: $E[Y(m)-Y(m')]$, for all $m\in\{1,\cdots,M\}$.

* Data strategy:

We randomly assign $k/N$ units to each of the treatment arms.

* Answer strategy:

Take every pairwise difference in means corresponding to the specific estimand.
```{r, eval=FALSE}
set.seed(123)
N <- 450000 #450K
covid_effect<-0.1
us_effect<-0.05
pa_effect<-0.075
lancaster_effect<-0.08
outcome_means <- c(covid_effect+us_effect #covid-us
                   ,covid_effect+pa_effect #covid-pa
                   ,covid_effect+lancaster_effect #covid-lancaster
                   ,covid_effect #covid-nolocation
                   ,us_effect #nocovid-us
                   )
sd_i <- 0.2
outcome_sds <- c(0, 0, 0, 0, 0)

# Population
population <- declare_population(N = N, u_1 = rnorm(N, 0, outcome_sds[1L]), 
    u_2 = rnorm(N, 0, outcome_sds[2L]), u_3 = rnorm(N, 0, outcome_sds[3L]), 
    u_4 = rnorm(N, 0, outcome_sds[4L]), u_5 = rnorm(N, 0, outcome_sds[5L]),
    u = rnorm(N) * sd_i)
# Potential outcomes
potential_outcomes <- declare_potential_outcomes(formula = Y ~ (outcome_means[1] + 
    u_1) * (Z == "1") + (outcome_means[2] + u_2) * (Z == "2") + 
    (outcome_means[3] + u_3) * (Z == "3") + (outcome_means[4] + 
    u_4) * (Z == "4") + + (outcome_means[5] + 
    u_5) * (Z == "5") + u ,  conditions = c("1", "2", "3", "4", "5"), 
    assignment_variables = Z)
# Estimands
estimand <- declare_estimands(ate_Y_2_1 = mean(Y_Z_2 - Y_Z_1), ate_Y_3_1 = mean(Y_Z_3 - 
    Y_Z_1), ate_Y_4_1 = mean(Y_Z_4 - Y_Z_1), ate_Y_5_1 = mean(Y_Z_5 - Y_Z_1),
    ate_Y_3_2 = mean(Y_Z_3 - Y_Z_2), ate_Y_4_2 = mean(Y_Z_4 - Y_Z_2), 
    ate_Y_5_2 = mean(Y_Z_5 - Y_Z_2), ate_Y_4_3 = mean(Y_Z_4 - 
    Y_Z_3), ate_Y_5_3 = mean(Y_Z_5 - Y_Z_3), ate_Y_5_4 = mean(Y_Z_5 - Y_Z_4))
# Assignment
assignment <- declare_assignment(num_arms = 5, conditions = c("1", "2", "3", 
"4","5"), assignment_variable = Z)
reveal_Y <- declare_reveal(assignment_variables = Z)
# Estimator
estimator <- declare_estimator(handler = function(data) {
    estimates <- rbind.data.frame(
      ate_Y_2_1 = difference_in_means(formula = Y ~ Z, data = data, condition1 = "1", condition2 = "2"), 
      ate_Y_3_1 = difference_in_means(formula = Y ~ Z, data = data, condition1 = "1", condition2 = "3"), 
      ate_Y_4_1 = difference_in_means(formula = Y ~ Z, data = data, condition1 = "1", condition2 = "4"), 
      ate_Y_5_1 = difference_in_means(formula = Y ~ Z, data = data, condition1 = "1", condition2 = "5"), 
      ate_Y_3_2 = difference_in_means(formula = Y ~ Z, data = data, condition1 = "2", condition2 = "3"), 
      ate_Y_4_2 = difference_in_means(formula = Y ~ Z, data = data, condition1 = "2", condition2 = "4"), 
      ate_Y_5_2 = difference_in_means(formula = Y ~ Z, data = data, condition1 = "2", condition2 = "5"),
      ate_Y_4_3 = difference_in_means(formula = Y ~ Z, data = data, condition1 = "3", condition2 = "4"),
      ate_Y_5_3 = difference_in_means(formula = Y ~ Z, data = data, condition1 = "3", condition2 = "5"),
      ate_Y_5_4 = difference_in_means(formula = Y ~ Z, data = data, condition1 = "4", condition2 = "5")
      )
    names(estimates)[names(estimates) == "N"] <- "N_DIM"
    estimates$estimator_label <- c("DIM (Z_2 - Z_1)", "DIM (Z_3 - Z_1)", 
    "DIM (Z_4 - Z_1)", "DIM (Z_5 - Z_1)","DIM (Z_3 - Z_2)", "DIM (Z_4 - Z_2)", "DIM (Z_5 - Z_2)",
    "DIM (Z_4 - Z_3)", "DIM (Z_5 - Z_3)", "DIM (Z_5 - Z_4)")
    estimates$estimand_label <- rownames(estimates)
    estimates$estimate <- estimates$coefficients
    estimates$term <- NULL
    return(estimates)
})
multi_arm_design <- population + potential_outcomes + assignment + 
    reveal_Y + estimand + estimator

# Diagnose Experiment 1 ad click rate:
t<-Sys.time()
diagnosis <- diagnose_design(multi_arm_design,diagnosands=)
Sys.time()-t
saveRDS(diagnosis,file="diagnosis-1.rds")

dat1<-diagnosis$diagnosands_df[,c("estimand_label","estimator_label","bias","rmse","power","coverage","mean_estimate","sd_estimate","mean_se","type_s_rate","mean_estimand","n_sims")]
dat2<-diagnosis$diagnosands_df[,c("estimand_label","estimator_label","se(bias)","se(rmse)","se(power)","se(coverage)","se(mean_estimate)","se(sd_estimate)","se(mean_se)","se(type_s_rate)","se(mean_estimand)","n_sims")]
dat2$estimand_label<-NA
dat2$estimator_label<-NA
tmp_n<-nrow(dat1)+nrow(dat2)
dat<-data.frame(Estimand=rep(NA,tmp_n),Estimator=rep(NA,tmp_n)
                ,Bias=rep(NA,tmp_n),RMSE=rep(NA,tmp_n)
                ,Power=rep(NA,tmp_n),Coverage=rep(NA,tmp_n)
                ,Mean_Estimate=rep(NA,tmp_n),SD_Estimate=rep(NA,tmp_n)
                ,Mean_SE=rep(NA,tmp_n) ,Type_S_Rate=rep(NA,tmp_n)
                ,Mean_Estimand=rep(NA,tmp_n),N_Sims=rep(NA,tmp_n))

j1<-j2<-1
for(i in 1:tmp_n){
  if(i%%2==0){
  dat[i,]<-dat2[j2,]
  j2<-j2+1
  }else{
  dat[i,]<-dat1[j1,]
  j1<-j1+1
  }
}
print(xtable(dat[,1:(ncol(dat)-1)],digits=2), include.rownames=FALSE)
```

Outcome of refugee thermometer, after clicking on ad:

Some respondents will not have thermometer ratings because of not clicking on the ads to be routed to the surveys; we can consider this as a type of attrition/missing data. This can affect both power and bias.

As such, we set up a design that accounts for attrition.

#### Design Declaration B

* Model: 

We specify a model with a population $N$ that has three variables affected by treatment: response variable $R_i$, outcome (here refugee thermometer rating in the survey) $Y_i$, which is correlated with response variable through parameter $\rho$. $Y_i^{obs}$ is the measured version of $Y_i$, which is only observed when $R_i=1$. For our setting, when a respondent is willing to click on the ad and answer the survey $R_i=1$.

* Inquiry:

Here we're interested in knowing the average of all respondents' differences in treatment arm potential outcomes, all of the pairwise comparisons between arms: $E[Y(m)-Y(m')]$, for all $m\in\{1,\cdots,M\}$. But we're also interested in the average treatment effect on reporting $E[R_i(m)-R_i(m')]$ as well as the pairwise comparison between treatment arms among those who report: $E[Y_i(m)-Y_i(m')|R_i=1]$.

* Data strategy:

We randomly assign $N/k = 90,000$ units to each of the treatment arms.

* Answer strategy:

$R_i$ and $Y_i^{obs}$, take every pairwise difference in means corresponding to the specific estimand.

Experiment 1: 5 arms

* T1 = covid - US
* T2 = covid - PA
* T3 = covid - Lancaster
* T4 = covid - No location
* T5 = no covid - US

```{r, eval=FALSE}
#Starting parameters
N <- 450000
a_R <- 0
#Likelihood of responding to survey after exposed to treatment arm: let covid effect on going to survey be 0.3
b1_R <- 0.5 #covid - US
b2_R <- 0.6 #covid - PA
b3_R <- 0.7 #covid - Lancaster
b4_R <- 0.4 #covid - No location (-0.1 from US)
b5_R <- 0.2 #no covid - US
a_Y <- 0
#Effect on thermometer rating after exposed to treatment arm:
b1_Y <- 0.5   #covid - US
b2_Y <- 0.6   #covid - PA
b3_Y <- 0.7   #covid - Lancaster
b4_Y <- 0.4   #covid - No location (-0.1 from US)
b5_Y <- 0.2   #no covid - US
#correl
rho <- c(0.0,0.2,0.8)

#set up
t<-Sys.time()
for(i in 1:3){
  cat("Start Design:",i,"\n")
#Population
population <- declare_population(N = N, u = rnorm(N), v=rnorm(N)
              ,u1_R = rnorm(N),u2_R = rnorm(N),u3_R = rnorm(N),u4_R = rnorm(N),u5_R = rnorm(N)
              ,u1_Y = rnorm(N,mean = rho[i] * u1_R, sd = sqrt(1 - rho[i]^2)),u2_Y = rnorm(N,mean = rho[i] * u2_R, sd = sqrt(1 - rho[i]^2))
              ,u3_Y = rnorm(N,mean = rho[i] * u3_R, sd = sqrt(1 - rho[i]^2)),u4_Y = rnorm(N,mean = rho[i] * u4_R, sd = sqrt(1 - rho[i]^2))
              ,u5_Y = rnorm(N,mean = rho[i] * u5_R, sd = sqrt(1 - rho[i]^2))
              ) #one error eqn Y; one error eqn R;  errors for each condition in R; errors for each condition in Y
#Potential outcomes 
  #R
potential_outcomes_R <- declare_potential_outcomes(
  R ~ (a_R + b1_R + u1_R)* (Z == "1") + (a_R + b2_R + u2_R)* (Z == "2")  
  + (a_R + b3_R + u3_R)* (Z == "3") + (a_R + b4_R + u4_R)* (Z == "4")
  + (a_R + b5_R + u5_R)* (Z == "5") > v, conditions = c("1", "2", "3", "4", "5"), assignment_variables = Z)
  #Y
potential_outcomes_Y <- declare_potential_outcomes(
  Y ~ (a_Y + b1_Y + u1_Y)* (Z == "1") + (a_Y + b2_Y + u2_Y)* (Z == "2")
  + (a_Y + b3_Y + u3_Y)* (Z == "3") + (a_Y + b4_Y + u4_Y)* (Z == "4")
  + (a_Y + b5_Y + u5_Y)* (Z == "5") + u, conditions = c("1", "2", "3", "4", "5"), assignment_variables = Z) #>u
#Estimands: 3 types -- ATE on R, ATE on Y, ATE on Y|R
estimand <- declare_estimands(
  #ATE on R
  ate_R_2_1 = mean(R_Z_2 - R_Z_1), ate_R_3_1 = mean(R_Z_3 - R_Z_1), ate_R_4_1 = mean(R_Z_4 - R_Z_1), ate_R_5_1 = mean(R_Z_5 - R_Z_1),
  ate_R_3_2 = mean(R_Z_3 - R_Z_2), ate_R_4_2 = mean(R_Z_4 - R_Z_2), ate_R_5_2 = mean(R_Z_5 - R_Z_2),
  ate_R_4_3 = mean(R_Z_4 - R_Z_3), ate_R_5_3 = mean(R_Z_5 - R_Z_3), ate_R_5_4 = mean(R_Z_5 - R_Z_4)
  #ATE on Y
  ,ate_Y_2_1 = mean(Y_Z_2 - Y_Z_1), ate_Y_3_1 = mean(Y_Z_3 - Y_Z_1), ate_Y_4_1 = mean(Y_Z_4 - Y_Z_1), ate_Y_5_1 = mean(Y_Z_5 - Y_Z_1),
  ate_Y_3_2 = mean(Y_Z_3 - Y_Z_2), ate_Y_4_2 = mean(Y_Z_4 - Y_Z_2), ate_Y_5_2 = mean(Y_Z_5 - Y_Z_2),
  ate_Y_4_3 = mean(Y_Z_4 - Y_Z_3), ate_Y_5_3 = mean(Y_Z_5 - Y_Z_3), ate_Y_5_4 = mean(Y_Z_5 - Y_Z_4)
  #ATE on Y|R
  ,ate_YR_2_1 = mean((Y_Z_2 - Y_Z_1)[R == 1]), ate_YR_3_1 = mean((Y_Z_3 - Y_Z_1)[R == 1])
  ,ate_YR_4_1 = mean((Y_Z_4 - Y_Z_1)[R == 1]), ate_YR_5_1 = mean((Y_Z_5 - Y_Z_1)[R == 1])
  ,ate_YR_3_2 = mean((Y_Z_3 - Y_Z_2)[R == 1]), ate_YR_4_2 = mean((Y_Z_4 - Y_Z_2)[R == 1])
  ,ate_YR_5_2 = mean((Y_Z_5 - Y_Z_2)[R == 1]), ate_YR_4_3 = mean((Y_Z_4 - Y_Z_3)[R == 1])
  ,ate_YR_5_3 = mean((Y_Z_5 - Y_Z_3)[R == 1]), ate_YR_5_4 = mean((Y_Z_5 - Y_Z_4)[R == 1])
  )
#Assignment
assignment <- declare_assignment(num_arms = 5, conditions = c("1", "2", "3", "4", "5"), assignment_variable = Z)
#Reveal/Observed: ??
reveal <- declare_reveal(outcome_variables = c("R", "Y"), assignment_variables = Z)
observed <- declare_step(Y_obs = ifelse(R, Y, NA), handler = fabricate)

#Estimator
estimator <- declare_estimator(handler = function(data) {
    estimates <- rbind.data.frame(
      #ATE on R
      ate_R_2_1 = difference_in_means(formula = R ~ Z, data = data, condition1 = "1", condition2 = "2"), 
      ate_R_3_1 = difference_in_means(formula = R ~ Z, data = data, condition1 = "1", condition2 = "3"), 
      ate_R_4_1 = difference_in_means(formula = R ~ Z, data = data, condition1 = "1", condition2 = "4"), 
      ate_R_5_1 = difference_in_means(formula = R ~ Z, data = data, condition1 = "1", condition2 = "5"), 
      ate_R_3_2 = difference_in_means(formula = R ~ Z, data = data, condition1 = "2", condition2 = "3"), 
      ate_R_4_2 = difference_in_means(formula = R ~ Z, data = data, condition1 = "2", condition2 = "4"), 
      ate_R_5_2 = difference_in_means(formula = R ~ Z, data = data, condition1 = "2", condition2 = "5"),
      ate_R_4_3 = difference_in_means(formula = R ~ Z, data = data, condition1 = "3", condition2 = "4"),
      ate_R_5_3 = difference_in_means(formula = R ~ Z, data = data, condition1 = "3", condition2 = "5"),
      ate_R_5_4 = difference_in_means(formula = R ~ Z, data = data, condition1 = "4", condition2 = "5"),
      # ATE on Y conditional on R
      ate_YR_2_1 = difference_in_means(formula = Y_obs ~ Z, data = data, condition1 = "1", condition2 = "2"), 
      ate_YR_3_1 = difference_in_means(formula = Y_obs ~ Z, data = data, condition1 = "1", condition2 = "3"), 
      ate_YR_4_1 = difference_in_means(formula = Y_obs ~ Z, data = data, condition1 = "1", condition2 = "4"), 
      ate_YR_5_1 = difference_in_means(formula = Y_obs ~ Z, data = data, condition1 = "1", condition2 = "5"), 
      ate_YR_3_2 = difference_in_means(formula = Y_obs ~ Z, data = data, condition1 = "2", condition2 = "3"), 
      ate_YR_4_2 = difference_in_means(formula = Y_obs ~ Z, data = data, condition1 = "2", condition2 = "4"), 
      ate_YR_5_2 = difference_in_means(formula = Y_obs ~ Z, data = data, condition1 = "2", condition2 = "5"),
      ate_YR_4_3 = difference_in_means(formula = Y_obs ~ Z, data = data, condition1 = "3", condition2 = "4"),
      ate_YR_5_3 = difference_in_means(formula = Y_obs ~ Z, data = data, condition1 = "3", condition2 = "5"),
      ate_YR_5_4 = difference_in_means(formula = Y_obs ~ Z, data = data, condition1 = "4", condition2 = "5"),
      #ATE on Y
      ate_Y_2_1 = difference_in_means(formula = Y ~ Z, data = data, condition1 = "1", condition2 = "2"), 
      ate_Y_3_1 = difference_in_means(formula = Y ~ Z, data = data, condition1 = "1", condition2 = "3"), 
      ate_Y_4_1 = difference_in_means(formula = Y ~ Z, data = data, condition1 = "1", condition2 = "4"), 
      ate_Y_5_1 = difference_in_means(formula = Y ~ Z, data = data, condition1 = "1", condition2 = "5"), 
      ate_Y_3_2 = difference_in_means(formula = Y ~ Z, data = data, condition1 = "2", condition2 = "3"), 
      ate_Y_4_2 = difference_in_means(formula = Y ~ Z, data = data, condition1 = "2", condition2 = "4"), 
      ate_Y_5_2 = difference_in_means(formula = Y ~ Z, data = data, condition1 = "2", condition2 = "5"),
      ate_Y_4_3 = difference_in_means(formula = Y ~ Z, data = data, condition1 = "3", condition2 = "4"),
      ate_Y_5_3 = difference_in_means(formula = Y ~ Z, data = data, condition1 = "3", condition2 = "5"),
      ate_Y_5_4 = difference_in_means(formula = Y ~ Z, data = data, condition1 = "4", condition2 = "5")
      )
    names(estimates)[names(estimates) == "N"] <- "N_DIM"
    estimates$estimator_label <- c(
      #R
      "DIM_R (Z_2 - Z_1)", "DIM_R (Z_3 - Z_1)", "DIM_R (Z_4 - Z_1)", "DIM_R (Z_5 - Z_1)","DIM_R (Z_3 - Z_2)"
      , "DIM_R (Z_4 - Z_2)", "DIM_R (Z_5 - Z_2)", "DIM_R (Z_4 - Z_3)", "DIM_R (Z_5 - Z_3)", "DIM_R (Z_5 - Z_4)"
      #Y|R
      , "DIM_Y_obs (Z_2 - Z_1)", "DIM_Y_obs (Z_3 - Z_1)", "DIM_Y_obs (Z_4 - Z_1)", "DIM_Y_obs (Z_5 - Z_1)","DIM_Y_obs (Z_3 - Z_2)"
      , "DIM_Y_obs (Z_4 - Z_2)", "DIM_Y_obs (Z_5 - Z_2)", "DIM_Y_obs (Z_4 - Z_3)", "DIM_Y_obs (Z_5 - Z_3)", "DIM_Y_obs (Z_5 - Z_4)"
      #Y
      ,"DIM_Y (Z_2 - Z_1)", "DIM_Y (Z_3 - Z_1)", "DIM_Y (Z_4 - Z_1)", "DIM_Y (Z_5 - Z_1)","DIM_Y (Z_3 - Z_2)"
      , "DIM_Y (Z_4 - Z_2)", "DIM_Y (Z_5 - Z_2)", "DIM_Y (Z_4 - Z_3)", "DIM_Y (Z_5 - Z_3)", "DIM_Y (Z_5 - Z_4)"
      )
    estimates$estimand_label <- rownames(estimates)
    estimates$estimate <- estimates$coefficients
    estimates$term <- NULL
    return(estimates)
})
multi_arm_attrition_design <- population + potential_outcomes_R + 
    potential_outcomes_Y + assignment + reveal + observed + 
    estimand + estimator
diagnoses <- diagnose_design(multi_arm_attrition_design)
saveRDS(diagnoses,paste("multi_arm_attrition_design-rho",i,".rds",sep=""))
cat("Finished Design:",i," in ", Sys.time()-t,"\n")
}
Sys.time()-t


# Combine and print xtable
rho1<-readRDS("multi_arm_attrition_design-rho1.rds")
rho2<-readRDS("multi_arm_attrition_design-rho2.rds")
rho3<-readRDS("multi_arm_attrition_design-rho3.rds")
dat1<-rho1$diagnosands_df
dat1$design_label<-"rho=0.0"
dat2<-rho2$diagnosands_df
dat2$design_label<-"rho=0.2"
dat3<-rho3$diagnosands_df
dat3$design_label<-"rho=0.8"

dat<-rbind(dat1,dat2,dat3)

dat1<-dat[,c("design_label","estimand_label","estimator_label","bias","rmse","power","coverage","mean_estimate","sd_estimate","mean_se","type_s_rate","mean_estimand","n_sims")]
dat2<-dat[,c("design_label","estimand_label","estimator_label","se(bias)","se(rmse)","se(power)","se(coverage)","se(mean_estimate)","se(sd_estimate)","se(mean_se)","se(type_s_rate)","se(mean_estimand)","n_sims")]


dat2$estimand_label<-NA
dat2$estimator_label<-NA

tmp_n<-nrow(dat1)+nrow(dat2)
d<-data.frame(Design=rep(NA,tmp_n),Estimand=rep(NA,tmp_n),Estimator=rep(NA,tmp_n)
                ,Bias=rep(NA,tmp_n),RMSE=rep(NA,tmp_n)
                ,Power=rep(NA,tmp_n),Coverage=rep(NA,tmp_n)
                ,Mean_Estimate=rep(NA,tmp_n),SD_Estimate=rep(NA,tmp_n)
                ,Mean_SE=rep(NA,tmp_n) ,Type_S_Rate=rep(NA,tmp_n)
                ,Mean_Estimand=rep(NA,tmp_n),N_Sims=rep(NA,tmp_n))

j1<-j2<-1
for(i in 1:tmp_n){
  if(i%%2==0){
  d[i,]<-dat2[j2,]
  j2<-j2+1
  }else{
  d[i,]<-dat1[j1,]
  j1<-j1+1
  }
}
print(xtable(d[1:60,2:(ncol(d)-1)],digits=2), include.rownames=FALSE) #rho=0.0

print(xtable(d[61:120,2:(ncol(d)-1)],digits=2), include.rownames=FALSE) #rho=0.2

print(xtable(d[121:nrow(d),2:(ncol(d)-1)],digits=2), include.rownames=FALSE) #rho=0.8

```


#### Post-experiment checks

Given that we had to estimate the likely number of profiles on FB who would be exposed to our treatment arms, we conduct a verification of our power calculations using the actual N sizes FB ended up reaching (roughly 25K per arm):

```{r, eval=FALSE}
set.seed(123)
N <- 125000 #Each arm 25K, 5 arms=125000
covid_effect<-0.1
us_effect<-0.05
pa_effect<-0.075
lancaster_effect<-0.08
outcome_means <- c(covid_effect+us_effect #covid-us
                   ,covid_effect+pa_effect #covid-pa
                   ,covid_effect+lancaster_effect #covid-lancaster
                   ,covid_effect #covid-nolocation
                   ,us_effect #nocovid-us
                   )
sd_i <- 0.2
outcome_sds <- c(0, 0, 0, 0, 0)

# Population
population <- declare_population(N = N, u_1 = rnorm(N, 0, outcome_sds[1L]), 
    u_2 = rnorm(N, 0, outcome_sds[2L]), u_3 = rnorm(N, 0, outcome_sds[3L]), 
    u_4 = rnorm(N, 0, outcome_sds[4L]), u_5 = rnorm(N, 0, outcome_sds[5L]),
    u = rnorm(N) * sd_i)
# Potential outcomes
potential_outcomes <- declare_potential_outcomes(formula = Y ~ (outcome_means[1] + 
    u_1) * (Z == "1") + (outcome_means[2] + u_2) * (Z == "2") + 
    (outcome_means[3] + u_3) * (Z == "3") + (outcome_means[4] + 
    u_4) * (Z == "4") + + (outcome_means[5] + 
    u_5) * (Z == "5") + u ,  conditions = c("1", "2", "3", "4", "5"), 
    assignment_variables = Z)
# Estimands
estimand <- declare_estimands(ate_Y_2_1 = mean(Y_Z_2 - Y_Z_1), ate_Y_3_1 = mean(Y_Z_3 - 
    Y_Z_1), ate_Y_4_1 = mean(Y_Z_4 - Y_Z_1), ate_Y_5_1 = mean(Y_Z_5 - Y_Z_1),
    ate_Y_3_2 = mean(Y_Z_3 - Y_Z_2), ate_Y_4_2 = mean(Y_Z_4 - Y_Z_2), 
    ate_Y_5_2 = mean(Y_Z_5 - Y_Z_2), ate_Y_4_3 = mean(Y_Z_4 - 
    Y_Z_3), ate_Y_5_3 = mean(Y_Z_5 - Y_Z_3), ate_Y_5_4 = mean(Y_Z_5 - Y_Z_4))
# Assignment
assignment <- declare_assignment(num_arms = 5, conditions = c("1", "2", "3", 
"4","5"), assignment_variable = Z)
reveal_Y <- declare_reveal(assignment_variables = Z)
# Estimator
estimator <- declare_estimator(handler = function(data) {
    estimates <- rbind.data.frame(
      ate_Y_2_1 = difference_in_means(formula = Y ~ Z, data = data, condition1 = "1", condition2 = "2"), 
      ate_Y_3_1 = difference_in_means(formula = Y ~ Z, data = data, condition1 = "1", condition2 = "3"), 
      ate_Y_4_1 = difference_in_means(formula = Y ~ Z, data = data, condition1 = "1", condition2 = "4"), 
      ate_Y_5_1 = difference_in_means(formula = Y ~ Z, data = data, condition1 = "1", condition2 = "5"), 
      ate_Y_3_2 = difference_in_means(formula = Y ~ Z, data = data, condition1 = "2", condition2 = "3"), 
      ate_Y_4_2 = difference_in_means(formula = Y ~ Z, data = data, condition1 = "2", condition2 = "4"), 
      ate_Y_5_2 = difference_in_means(formula = Y ~ Z, data = data, condition1 = "2", condition2 = "5"),
      ate_Y_4_3 = difference_in_means(formula = Y ~ Z, data = data, condition1 = "3", condition2 = "4"),
      ate_Y_5_3 = difference_in_means(formula = Y ~ Z, data = data, condition1 = "3", condition2 = "5"),
      ate_Y_5_4 = difference_in_means(formula = Y ~ Z, data = data, condition1 = "4", condition2 = "5")
      )
    names(estimates)[names(estimates) == "N"] <- "N_DIM"
    estimates$estimator_label <- c("DIM (Z_2 - Z_1)", "DIM (Z_3 - Z_1)", 
    "DIM (Z_4 - Z_1)", "DIM (Z_5 - Z_1)","DIM (Z_3 - Z_2)", "DIM (Z_4 - Z_2)", "DIM (Z_5 - Z_2)",
    "DIM (Z_4 - Z_3)", "DIM (Z_5 - Z_3)", "DIM (Z_5 - Z_4)")
    estimates$estimand_label <- rownames(estimates)
    estimates$estimate <- estimates$coefficients
    estimates$term <- NULL
    return(estimates)
})
multi_arm_design <- population + potential_outcomes + assignment + 
    reveal_Y + estimand + estimator

# Diagnose Experiment 1 ad click rate:
t<-Sys.time()
diagnosis <- diagnose_design(multi_arm_design)
Sys.time()-t
saveRDS(diagnosis,file="post-diagnosis-1.rds")

dat1<-diagnosis$diagnosands_df[,c("estimand_label","estimator_label","bias","rmse","power","coverage","mean_estimate","sd_estimate","mean_se","type_s_rate","mean_estimand","n_sims")]
dat2<-diagnosis$diagnosands_df[,c("estimand_label","estimator_label","se(bias)","se(rmse)","se(power)","se(coverage)","se(mean_estimate)","se(sd_estimate)","se(mean_se)","se(type_s_rate)","se(mean_estimand)","n_sims")]
dat2$estimand_label<-NA
dat2$estimator_label<-NA
tmp_n<-nrow(dat1)+nrow(dat2)
dat<-data.frame(Estimand=rep(NA,tmp_n),Estimator=rep(NA,tmp_n)
                ,Bias=rep(NA,tmp_n),RMSE=rep(NA,tmp_n)
                ,Power=rep(NA,tmp_n),Coverage=rep(NA,tmp_n)
                ,Mean_Estimate=rep(NA,tmp_n),SD_Estimate=rep(NA,tmp_n)
                ,Mean_SE=rep(NA,tmp_n) ,Type_S_Rate=rep(NA,tmp_n)
                ,Mean_Estimand=rep(NA,tmp_n),N_Sims=rep(NA,tmp_n))

j1<-j2<-1
for(i in 1:tmp_n){
  if(i%%2==0){
  dat[i,]<-dat2[j2,]
  j2<-j2+1
  }else{
  dat[i,]<-dat1[j1,]
  j1<-j1+1
  }
}
print(xtable(dat[,1:(ncol(dat)-1)],digits=2), include.rownames=FALSE)
```


The only estimand for which our experiment is lower on power for (80%) is the difference between T2 = covid - PA and T3 = covid - Lancaster. 
## Experiment 2: 5 arms

* T1 = covid - Refugee
* T2 = no covid - Refugee
* T3 = covid - Neither
* T4 = no covid - Neither
* T5 = covid - Immigrant

We want to learn whether there is differential support for refugee ads on Facebook. Respondents
are randomly assigned to receive ads with refugees with information on the above five arms. Assignment to each of the five arms is with equal probabilities, and other then mention of covid and type of individual, ads otherwise identical. We define our outcome of interest as the difference in click rates between experimental conditions.

We'll focus on pairwise comparisons across treatments (a conservative approach given our main hypotheses will be answered with comparisons of T1-T2, T2-T4, T3-T4, T1-T5, T3-T5).

#### Design Declaration A

* Model: 

We specify a population of size $N$ where a unit $i$ has a potential outcome, $Y_i(Z=0)$, when it remains untreated and $M\, (m=1,2,\cdots,M)$ potential outcomes defined according to the treatment that it receives. The effect of each treatment on the outcome of unit $i$ is equal to the difference in the potential outcome under treatment condition m and the control condition: $Y_i(Z=m)-Y_i(Z=0)$.

* Inquiry:

We are interested in all of the pairwise comparisons between arms: $E[Y(m)-Y(m')]$, for all $m\in\{1,\cdots,M\}$.

* Data strategy:

We randomly assign $k/N$ units to each of the treatment arms.

* Answer strategy:

Take every pairwise difference in means corresponding to the specific estimand.

```{r , eval=FALSE}
set.seed(123)
N <- 450000 #450K
covid_effect<-0.1 #assume same covid effect as Experiment 1
refugee_effect<-0.075 #assume refugee effect is positive and larger than immigrant
immigrant_effect<-0.025 #assume immigrant effect is positive and smaller than refugee effect
outcome_means <- c(covid_effect+refugee_effect #covid - Refugee
                   ,refugee_effect #no covid - Refugee
                   ,covid_effect#covid - Neither
                   ,0 #no covid - Neither; assume no effect of ad
                   ,covid_effect+immigrant_effect#covid - Immigrant
                   )# also assumes that there are no interaction effects
sd_i <- 0.2
outcome_sds <- c(0, 0, 0, 0, 0)

# Population
population <- declare_population(N = N, u_1 = rnorm(N, 0, outcome_sds[1L]), 
    u_2 = rnorm(N, 0, outcome_sds[2L]), u_3 = rnorm(N, 0, outcome_sds[3L]), 
    u_4 = rnorm(N, 0, outcome_sds[4L]), u_5 = rnorm(N, 0, outcome_sds[5L]),
    u = rnorm(N) * sd_i)
# Potential outcomes
potential_outcomes <- declare_potential_outcomes(formula = Y ~ (outcome_means[1] + 
    u_1) * (Z == "1") + (outcome_means[2] + u_2) * (Z == "2") + 
    (outcome_means[3] + u_3) * (Z == "3") + (outcome_means[4] + 
    u_4) * (Z == "4") + + (outcome_means[5] + 
    u_5) * (Z == "5") + u ,  conditions = c("1", "2", "3", "4", "5"), 
    assignment_variables = Z)
# Estimands
estimand <- declare_estimands(ate_Y_2_1 = mean(Y_Z_2 - Y_Z_1), ate_Y_3_1 = mean(Y_Z_3 - 
    Y_Z_1), ate_Y_4_1 = mean(Y_Z_4 - Y_Z_1), ate_Y_5_1 = mean(Y_Z_5 - Y_Z_1),
    ate_Y_3_2 = mean(Y_Z_3 - Y_Z_2), ate_Y_4_2 = mean(Y_Z_4 - Y_Z_2), 
    ate_Y_5_2 = mean(Y_Z_5 - Y_Z_2), ate_Y_4_3 = mean(Y_Z_4 - 
    Y_Z_3), ate_Y_5_3 = mean(Y_Z_5 - Y_Z_3), ate_Y_5_4 = mean(Y_Z_5 - Y_Z_4))
# Assignment
assignment <- declare_assignment(num_arms = 5, conditions = c("1", "2", "3", 
"4","5"), assignment_variable = Z)
reveal_Y <- declare_reveal(assignment_variables = Z)
# Estimator
estimator <- declare_estimator(handler = function(data) {
    estimates <- rbind.data.frame(
      ate_Y_2_1 = difference_in_means(formula = Y ~ Z, data = data, condition1 = "1", condition2 = "2"), 
      ate_Y_3_1 = difference_in_means(formula = Y ~ Z, data = data, condition1 = "1", condition2 = "3"), 
      ate_Y_4_1 = difference_in_means(formula = Y ~ Z, data = data, condition1 = "1", condition2 = "4"), 
      ate_Y_5_1 = difference_in_means(formula = Y ~ Z, data = data, condition1 = "1", condition2 = "5"), 
      ate_Y_3_2 = difference_in_means(formula = Y ~ Z, data = data, condition1 = "2", condition2 = "3"), 
      ate_Y_4_2 = difference_in_means(formula = Y ~ Z, data = data, condition1 = "2", condition2 = "4"), 
      ate_Y_5_2 = difference_in_means(formula = Y ~ Z, data = data, condition1 = "2", condition2 = "5"),
      ate_Y_4_3 = difference_in_means(formula = Y ~ Z, data = data, condition1 = "3", condition2 = "4"),
      ate_Y_5_3 = difference_in_means(formula = Y ~ Z, data = data, condition1 = "3", condition2 = "5"),
      ate_Y_5_4 = difference_in_means(formula = Y ~ Z, data = data, condition1 = "4", condition2 = "5")
      )
    names(estimates)[names(estimates) == "N"] <- "N_DIM"
    estimates$estimator_label <- c("DIM (Z_2 - Z_1)", "DIM (Z_3 - Z_1)", 
    "DIM (Z_4 - Z_1)", "DIM (Z_5 - Z_1)","DIM (Z_3 - Z_2)", "DIM (Z_4 - Z_2)", "DIM (Z_5 - Z_2)",
    "DIM (Z_4 - Z_3)", "DIM (Z_5 - Z_3)", "DIM (Z_5 - Z_4)")
    estimates$estimand_label <- rownames(estimates)
    estimates$estimate <- estimates$coefficients
    estimates$term <- NULL
    return(estimates)
})
multi_arm_design2 <- population + potential_outcomes + assignment + 
    reveal_Y + estimand + estimator
# Diagnose Experiment 1 ad click rate:
t<-Sys.time()
diagnosis <- diagnose_design(multi_arm_design2)
Sys.time()-t
saveRDS(diagnosis,file="diagnosis-2.rds")



library(xtable)
dat1<-diagnosis$diagnosands_df[,c("estimand_label","estimator_label","bias","rmse","power","coverage","mean_estimate","sd_estimate","mean_se","type_s_rate","mean_estimand","n_sims")]
dat2<-diagnosis$diagnosands_df[,c("estimand_label","estimator_label","se(bias)","se(rmse)","se(power)","se(coverage)","se(mean_estimate)","se(sd_estimate)","se(mean_se)","se(type_s_rate)","se(mean_estimand)","n_sims")]
dat2$estimand_label<-NA
dat2$estimator_label<-NA
tmp_n<-nrow(dat1)+nrow(dat2)
dat<-data.frame(Estimand=rep(NA,tmp_n),Estimator=rep(NA,tmp_n)
                ,Bias=rep(NA,tmp_n),RMSE=rep(NA,tmp_n)
                ,Power=rep(NA,tmp_n),Coverage=rep(NA,tmp_n)
                ,Mean_Estimate=rep(NA,tmp_n),SD_Estimate=rep(NA,tmp_n)
                ,Mean_SE=rep(NA,tmp_n) ,Type_S_Rate=rep(NA,tmp_n)
                ,Mean_Estimand=rep(NA,tmp_n),N_Sims=rep(NA,tmp_n))

j1<-j2<-1
for(i in 1:tmp_n){
  if(i%%2==0){
  dat[i,]<-dat2[j2,]
  j2<-j2+1
  }else{
  dat[i,]<-dat1[j1,]
  j1<-j1+1
  }
}
print(xtable(dat[,1:(ncol(dat)-1)],digits=2), include.rownames=FALSE)
```

#### Design Declaration B

* Model: 

We specify a model with a population $N$ that has three variables affected by treatment: response variable $R_i$, outcome (here refugee thermometer rating in the survey) $Y_i$, which is correlated with response variable through parameter $\rho$. $Y_i^{obs}$ is the measured version of $Y_i$, which is only observed when $R_i=1$. For our setting, when a respondent is willing to click on the ad and answer the survey $R_i=1$.

* Inquiry:

Here we're interested in knowing the average of all respondents' differences in treatment arm potential outcomes, all of the pairwise comparisons between arms: $E[Y(m)-Y(m')]$, for all $m\in\{1,\cdots,M\}$. But we're also interested in the average treatment effect on reporting $E[R_i(m)-R_i(m')]$ as well as the pairwise comparison between treatment arms among those who report: $E[Y_i(m)-Y_i(m')|R_i=1]$.

* Data strategy:

We randomly assign $N/k = 90,000$ units to each of the treatment arms.

* Answer strategy:

$R_i$ and $Y_i^{obs}$, take every pairwise difference in means corresponding to the specific estimand.

Experiment 2:

* T1 = covid - Refugee
* T2 = no covid - Refugee
* T3 = covid - Neither
* T4 = no covid - Neither
* T5 = covid - Immigrant

```{r, eval=FALSE}
#Starting parameters
N <- 450000
a_R <- 0
#Likelihood of responding to survey after exposed to treatment arm: let covid effect on going to survey be 0.3
b1_R <- 0.1+0.075 + 0.2 #covid - refugee
b2_R <- 0.075 + 0.2  #no covid - refugee
b3_R <- 0.1 + 0.2  #covid - neither
b4_R <- 0.02 + 0.2 #no covid - neither
b5_R <- 0.1+0.025 + 0.2  #covid - immigrant
a_Y <- 0
#Effect on thermometer rating after exposed to treatment arm:
b1_Y <- 0.1+0.075   #covid - refugee
b2_Y <- 0.075   #no covid - refugee
b3_Y <- 0.1   #covid - neither
b4_Y <- 0.02  #no covid - neither
b5_Y <- 0.1+0.025   #covid - immigrant
#correl
rho <- c(0.0,0.2,0.8)

#set up
t<-Sys.time()
for(i in 1:3){
  cat("Start Design:",i,"\n")
#Population
population <- declare_population(N = N, u = rnorm(N), v=rnorm(N)
              ,u1_R = rnorm(N),u2_R = rnorm(N),u3_R = rnorm(N),u4_R = rnorm(N),u5_R = rnorm(N)
              ,u1_Y = rnorm(N,mean = rho[i] * u1_R, sd = sqrt(1 - rho[i]^2)),u2_Y = rnorm(N,mean = rho[i] * u2_R, sd = sqrt(1 - rho[i]^2))
              ,u3_Y = rnorm(N,mean = rho[i] * u3_R, sd = sqrt(1 - rho[i]^2)),u4_Y = rnorm(N,mean = rho[i] * u4_R, sd = sqrt(1 - rho[i]^2))
              ,u5_Y = rnorm(N,mean = rho[i] * u5_R, sd = sqrt(1 - rho[i]^2))
              ) #one error eqn Y; one error eqn R;  errors for each condition in R; errors for each condition in Y
#Potential outcomes 
  #R
potential_outcomes_R <- declare_potential_outcomes(
  R ~ (a_R + b1_R + u1_R)* (Z == "1") + (a_R + b2_R + u2_R)* (Z == "2")  
  + (a_R + b3_R + u3_R)* (Z == "3") + (a_R + b4_R + u4_R)* (Z == "4")
  + (a_R + b5_R + u5_R)* (Z == "5") > v, conditions = c("1", "2", "3", "4", "5"), assignment_variables = Z)
  #Y
potential_outcomes_Y <- declare_potential_outcomes(
  Y ~ (a_Y + b1_Y + u1_Y)* (Z == "1") + (a_Y + b2_Y + u2_Y)* (Z == "2")
  + (a_Y + b3_Y + u3_Y)* (Z == "3") + (a_Y + b4_Y + u4_Y)* (Z == "4")
  + (a_Y + b5_Y + u5_Y)* (Z == "5") + u, conditions = c("1", "2", "3", "4", "5"), assignment_variables = Z) #>u
#Estimands: 3 types -- ATE on R, ATE on Y, ATE on Y|R
estimand <- declare_estimands(
  #ATE on R
  ate_R_2_1 = mean(R_Z_2 - R_Z_1), ate_R_3_1 = mean(R_Z_3 - R_Z_1), ate_R_4_1 = mean(R_Z_4 - R_Z_1), ate_R_5_1 = mean(R_Z_5 - R_Z_1),
  ate_R_3_2 = mean(R_Z_3 - R_Z_2), ate_R_4_2 = mean(R_Z_4 - R_Z_2), ate_R_5_2 = mean(R_Z_5 - R_Z_2),
  ate_R_4_3 = mean(R_Z_4 - R_Z_3), ate_R_5_3 = mean(R_Z_5 - R_Z_3), ate_R_5_4 = mean(R_Z_5 - R_Z_4)
  #ATE on Y
  ,ate_Y_2_1 = mean(Y_Z_2 - Y_Z_1), ate_Y_3_1 = mean(Y_Z_3 - Y_Z_1), ate_Y_4_1 = mean(Y_Z_4 - Y_Z_1), ate_Y_5_1 = mean(Y_Z_5 - Y_Z_1),
  ate_Y_3_2 = mean(Y_Z_3 - Y_Z_2), ate_Y_4_2 = mean(Y_Z_4 - Y_Z_2), ate_Y_5_2 = mean(Y_Z_5 - Y_Z_2),
  ate_Y_4_3 = mean(Y_Z_4 - Y_Z_3), ate_Y_5_3 = mean(Y_Z_5 - Y_Z_3), ate_Y_5_4 = mean(Y_Z_5 - Y_Z_4)
  #ATE on Y|R
  ,ate_YR_2_1 = mean((Y_Z_2 - Y_Z_1)[R == 1]), ate_YR_3_1 = mean((Y_Z_3 - Y_Z_1)[R == 1])
  ,ate_YR_4_1 = mean((Y_Z_4 - Y_Z_1)[R == 1]), ate_YR_5_1 = mean((Y_Z_5 - Y_Z_1)[R == 1])
  ,ate_YR_3_2 = mean((Y_Z_3 - Y_Z_2)[R == 1]), ate_YR_4_2 = mean((Y_Z_4 - Y_Z_2)[R == 1])
  ,ate_YR_5_2 = mean((Y_Z_5 - Y_Z_2)[R == 1]), ate_YR_4_3 = mean((Y_Z_4 - Y_Z_3)[R == 1])
  ,ate_YR_5_3 = mean((Y_Z_5 - Y_Z_3)[R == 1]), ate_YR_5_4 = mean((Y_Z_5 - Y_Z_4)[R == 1])
  )
#Assignment
assignment <- declare_assignment(num_arms = 5, conditions = c("1", "2", "3", "4", "5"), assignment_variable = Z)
#Reveal/Observed: ??
reveal <- declare_reveal(outcome_variables = c("R", "Y"), assignment_variables = Z)
observed <- declare_step(Y_obs = ifelse(R, Y, NA), handler = fabricate)

#Estimator
estimator <- declare_estimator(handler = function(data) {
    estimates <- rbind.data.frame(
      #ATE on R
      ate_R_2_1 = difference_in_means(formula = R ~ Z, data = data, condition1 = "1", condition2 = "2"), 
      ate_R_3_1 = difference_in_means(formula = R ~ Z, data = data, condition1 = "1", condition2 = "3"), 
      ate_R_4_1 = difference_in_means(formula = R ~ Z, data = data, condition1 = "1", condition2 = "4"), 
      ate_R_5_1 = difference_in_means(formula = R ~ Z, data = data, condition1 = "1", condition2 = "5"), 
      ate_R_3_2 = difference_in_means(formula = R ~ Z, data = data, condition1 = "2", condition2 = "3"), 
      ate_R_4_2 = difference_in_means(formula = R ~ Z, data = data, condition1 = "2", condition2 = "4"), 
      ate_R_5_2 = difference_in_means(formula = R ~ Z, data = data, condition1 = "2", condition2 = "5"),
      ate_R_4_3 = difference_in_means(formula = R ~ Z, data = data, condition1 = "3", condition2 = "4"),
      ate_R_5_3 = difference_in_means(formula = R ~ Z, data = data, condition1 = "3", condition2 = "5"),
      ate_R_5_4 = difference_in_means(formula = R ~ Z, data = data, condition1 = "4", condition2 = "5"),
      # ATE on Y conditional on R
      ate_YR_2_1 = difference_in_means(formula = Y_obs ~ Z, data = data, condition1 = "1", condition2 = "2"), 
      ate_YR_3_1 = difference_in_means(formula = Y_obs ~ Z, data = data, condition1 = "1", condition2 = "3"), 
      ate_YR_4_1 = difference_in_means(formula = Y_obs ~ Z, data = data, condition1 = "1", condition2 = "4"), 
      ate_YR_5_1 = difference_in_means(formula = Y_obs ~ Z, data = data, condition1 = "1", condition2 = "5"), 
      ate_YR_3_2 = difference_in_means(formula = Y_obs ~ Z, data = data, condition1 = "2", condition2 = "3"), 
      ate_YR_4_2 = difference_in_means(formula = Y_obs ~ Z, data = data, condition1 = "2", condition2 = "4"), 
      ate_YR_5_2 = difference_in_means(formula = Y_obs ~ Z, data = data, condition1 = "2", condition2 = "5"),
      ate_YR_4_3 = difference_in_means(formula = Y_obs ~ Z, data = data, condition1 = "3", condition2 = "4"),
      ate_YR_5_3 = difference_in_means(formula = Y_obs ~ Z, data = data, condition1 = "3", condition2 = "5"),
      ate_YR_5_4 = difference_in_means(formula = Y_obs ~ Z, data = data, condition1 = "4", condition2 = "5"),
      #ATE on Y
      ate_Y_2_1 = difference_in_means(formula = Y ~ Z, data = data, condition1 = "1", condition2 = "2"), 
      ate_Y_3_1 = difference_in_means(formula = Y ~ Z, data = data, condition1 = "1", condition2 = "3"), 
      ate_Y_4_1 = difference_in_means(formula = Y ~ Z, data = data, condition1 = "1", condition2 = "4"), 
      ate_Y_5_1 = difference_in_means(formula = Y ~ Z, data = data, condition1 = "1", condition2 = "5"), 
      ate_Y_3_2 = difference_in_means(formula = Y ~ Z, data = data, condition1 = "2", condition2 = "3"), 
      ate_Y_4_2 = difference_in_means(formula = Y ~ Z, data = data, condition1 = "2", condition2 = "4"), 
      ate_Y_5_2 = difference_in_means(formula = Y ~ Z, data = data, condition1 = "2", condition2 = "5"),
      ate_Y_4_3 = difference_in_means(formula = Y ~ Z, data = data, condition1 = "3", condition2 = "4"),
      ate_Y_5_3 = difference_in_means(formula = Y ~ Z, data = data, condition1 = "3", condition2 = "5"),
      ate_Y_5_4 = difference_in_means(formula = Y ~ Z, data = data, condition1 = "4", condition2 = "5")
      )
    names(estimates)[names(estimates) == "N"] <- "N_DIM"
    estimates$estimator_label <- c(
      #R
      "DIM_R (Z_2 - Z_1)", "DIM_R (Z_3 - Z_1)", "DIM_R (Z_4 - Z_1)", "DIM_R (Z_5 - Z_1)","DIM_R (Z_3 - Z_2)"
      , "DIM_R (Z_4 - Z_2)", "DIM_R (Z_5 - Z_2)", "DIM_R (Z_4 - Z_3)", "DIM_R (Z_5 - Z_3)", "DIM_R (Z_5 - Z_4)"
      #Y|R
      , "DIM_Y_obs (Z_2 - Z_1)", "DIM_Y_obs (Z_3 - Z_1)", "DIM_Y_obs (Z_4 - Z_1)", "DIM_Y_obs (Z_5 - Z_1)","DIM_Y_obs (Z_3 - Z_2)"
      , "DIM_Y_obs (Z_4 - Z_2)", "DIM_Y_obs (Z_5 - Z_2)", "DIM_Y_obs (Z_4 - Z_3)", "DIM_Y_obs (Z_5 - Z_3)", "DIM_Y_obs (Z_5 - Z_4)"
      #Y
      ,"DIM_Y (Z_2 - Z_1)", "DIM_Y (Z_3 - Z_1)", "DIM_Y (Z_4 - Z_1)", "DIM_Y (Z_5 - Z_1)","DIM_Y (Z_3 - Z_2)"
      , "DIM_Y (Z_4 - Z_2)", "DIM_Y (Z_5 - Z_2)", "DIM_Y (Z_4 - Z_3)", "DIM_Y (Z_5 - Z_3)", "DIM_Y (Z_5 - Z_4)"
      )
    estimates$estimand_label <- rownames(estimates)
    estimates$estimate <- estimates$coefficients
    estimates$term <- NULL
    return(estimates)
})
multi_arm_attrition_design <- population + potential_outcomes_R + 
    potential_outcomes_Y + assignment + reveal + observed + 
    estimand + estimator
diagnoses <- diagnose_design(multi_arm_attrition_design)
saveRDS(diagnoses,paste("multi_arm_attrition_design2-rho",i,".rds",sep=""))
cat("Finished Design:",i," in ", Sys.time()-t,"\n")
}
Sys.time()-t


# Combine and print xtable
rho1<-readRDS("multi_arm_attrition_design2-rho1.rds")
rho2<-readRDS("multi_arm_attrition_design2-rho2.rds")
rho3<-readRDS("multi_arm_attrition_design2-rho3.rds")
dat1<-rho1$diagnosands_df
dat1$design_label<-"rho=0.0"
dat2<-rho2$diagnosands_df
dat2$design_label<-"rho=0.2"
dat3<-rho3$diagnosands_df
dat3$design_label<-"rho=0.8"

dat<-rbind(dat1,dat2,dat3)

dat1<-dat[,c("design_label","estimand_label","estimator_label","bias","rmse","power","coverage","mean_estimate","sd_estimate","mean_se","type_s_rate","mean_estimand","n_sims")]
dat2<-dat[,c("design_label","estimand_label","estimator_label","se(bias)","se(rmse)","se(power)","se(coverage)","se(mean_estimate)","se(sd_estimate)","se(mean_se)","se(type_s_rate)","se(mean_estimand)","n_sims")]


dat2$estimand_label<-NA
dat2$estimator_label<-NA

tmp_n<-nrow(dat1)+nrow(dat2)
d<-data.frame(Design=rep(NA,tmp_n),Estimand=rep(NA,tmp_n),Estimator=rep(NA,tmp_n)
                ,Bias=rep(NA,tmp_n),RMSE=rep(NA,tmp_n)
                ,Power=rep(NA,tmp_n),Coverage=rep(NA,tmp_n)
                ,Mean_Estimate=rep(NA,tmp_n),SD_Estimate=rep(NA,tmp_n)
                ,Mean_SE=rep(NA,tmp_n) ,Type_S_Rate=rep(NA,tmp_n)
                ,Mean_Estimand=rep(NA,tmp_n),N_Sims=rep(NA,tmp_n))

j1<-j2<-1
for(i in 1:tmp_n){
  if(i%%2==0){
  d[i,]<-dat2[j2,]
  j2<-j2+1
  }else{
  d[i,]<-dat1[j1,]
  j1<-j1+1
  }
}
print(xtable(d[1:60,2:(ncol(d)-1)],digits=2), include.rownames=FALSE) #rho=0.0

print(xtable(d[61:120,2:(ncol(d)-1)],digits=2), include.rownames=FALSE) #rho=0.2

print(xtable(d[121:nrow(d),2:(ncol(d)-1)],digits=2), include.rownames=FALSE) #rho=0.8

```


#### Post-experiment checks

Given that we had to estimate the likely number of profiles on FB who would be exposed to our treatment arms, we conduct a verification of our power calculations using the actual N sizes FB ended up reaching (roughly 45K per arm):

```{r , eval=FALSE}
set.seed(123)
N <- 225000 #45K per arm * 5
covid_effect<-0.1 #assume same covid effect as Experiment 1
refugee_effect<-0.075 #assume refugee effect is positive and larger than immigrant
immigrant_effect<-0.025 #assume immigrant effect is positive and smaller than refugee effect
outcome_means <- c(covid_effect+refugee_effect #covid - Refugee
                   ,refugee_effect #no covid - Refugee
                   ,covid_effect#covid - Neither
                   ,0 #no covid - Neither; assume no effect of ad
                   ,covid_effect+immigrant_effect#covid - Immigrant
                   )# also assumes that there are no interaction effects
sd_i <- 0.2
outcome_sds <- c(0, 0, 0, 0, 0)

# Population
population <- declare_population(N = N, u_1 = rnorm(N, 0, outcome_sds[1L]), 
    u_2 = rnorm(N, 0, outcome_sds[2L]), u_3 = rnorm(N, 0, outcome_sds[3L]), 
    u_4 = rnorm(N, 0, outcome_sds[4L]), u_5 = rnorm(N, 0, outcome_sds[5L]),
    u = rnorm(N) * sd_i)
# Potential outcomes
potential_outcomes <- declare_potential_outcomes(formula = Y ~ (outcome_means[1] + 
    u_1) * (Z == "1") + (outcome_means[2] + u_2) * (Z == "2") + 
    (outcome_means[3] + u_3) * (Z == "3") + (outcome_means[4] + 
    u_4) * (Z == "4") + + (outcome_means[5] + 
    u_5) * (Z == "5") + u ,  conditions = c("1", "2", "3", "4", "5"), 
    assignment_variables = Z)
# Estimands
estimand <- declare_estimands(ate_Y_2_1 = mean(Y_Z_2 - Y_Z_1), ate_Y_3_1 = mean(Y_Z_3 - 
    Y_Z_1), ate_Y_4_1 = mean(Y_Z_4 - Y_Z_1), ate_Y_5_1 = mean(Y_Z_5 - Y_Z_1),
    ate_Y_3_2 = mean(Y_Z_3 - Y_Z_2), ate_Y_4_2 = mean(Y_Z_4 - Y_Z_2), 
    ate_Y_5_2 = mean(Y_Z_5 - Y_Z_2), ate_Y_4_3 = mean(Y_Z_4 - 
    Y_Z_3), ate_Y_5_3 = mean(Y_Z_5 - Y_Z_3), ate_Y_5_4 = mean(Y_Z_5 - Y_Z_4))
# Assignment
assignment <- declare_assignment(num_arms = 5, conditions = c("1", "2", "3", 
"4","5"), assignment_variable = Z)
reveal_Y <- declare_reveal(assignment_variables = Z)
# Estimator
estimator <- declare_estimator(handler = function(data) {
    estimates <- rbind.data.frame(
      ate_Y_2_1 = difference_in_means(formula = Y ~ Z, data = data, condition1 = "1", condition2 = "2"), 
      ate_Y_3_1 = difference_in_means(formula = Y ~ Z, data = data, condition1 = "1", condition2 = "3"), 
      ate_Y_4_1 = difference_in_means(formula = Y ~ Z, data = data, condition1 = "1", condition2 = "4"), 
      ate_Y_5_1 = difference_in_means(formula = Y ~ Z, data = data, condition1 = "1", condition2 = "5"), 
      ate_Y_3_2 = difference_in_means(formula = Y ~ Z, data = data, condition1 = "2", condition2 = "3"), 
      ate_Y_4_2 = difference_in_means(formula = Y ~ Z, data = data, condition1 = "2", condition2 = "4"), 
      ate_Y_5_2 = difference_in_means(formula = Y ~ Z, data = data, condition1 = "2", condition2 = "5"),
      ate_Y_4_3 = difference_in_means(formula = Y ~ Z, data = data, condition1 = "3", condition2 = "4"),
      ate_Y_5_3 = difference_in_means(formula = Y ~ Z, data = data, condition1 = "3", condition2 = "5"),
      ate_Y_5_4 = difference_in_means(formula = Y ~ Z, data = data, condition1 = "4", condition2 = "5")
      )
    names(estimates)[names(estimates) == "N"] <- "N_DIM"
    estimates$estimator_label <- c("DIM (Z_2 - Z_1)", "DIM (Z_3 - Z_1)", 
    "DIM (Z_4 - Z_1)", "DIM (Z_5 - Z_1)","DIM (Z_3 - Z_2)", "DIM (Z_4 - Z_2)", "DIM (Z_5 - Z_2)",
    "DIM (Z_4 - Z_3)", "DIM (Z_5 - Z_3)", "DIM (Z_5 - Z_4)")
    estimates$estimand_label <- rownames(estimates)
    estimates$estimate <- estimates$coefficients
    estimates$term <- NULL
    return(estimates)
})
multi_arm_design2 <- population + potential_outcomes + assignment + 
    reveal_Y + estimand + estimator
# Diagnose Experiment 1 ad click rate:
t<-Sys.time()
diagnosis <- diagnose_design(multi_arm_design2)
Sys.time()-t
saveRDS(diagnosis,file="post-diagnosis-2.rds")



library(xtable)
dat1<-diagnosis$diagnosands_df[,c("estimand_label","estimator_label","bias","rmse","power","coverage","mean_estimate","sd_estimate","mean_se","type_s_rate","mean_estimand","n_sims")]
dat2<-diagnosis$diagnosands_df[,c("estimand_label","estimator_label","se(bias)","se(rmse)","se(power)","se(coverage)","se(mean_estimate)","se(sd_estimate)","se(mean_se)","se(type_s_rate)","se(mean_estimand)","n_sims")]
dat2$estimand_label<-NA
dat2$estimator_label<-NA
tmp_n<-nrow(dat1)+nrow(dat2)
dat<-data.frame(Estimand=rep(NA,tmp_n),Estimator=rep(NA,tmp_n)
                ,Bias=rep(NA,tmp_n),RMSE=rep(NA,tmp_n)
                ,Power=rep(NA,tmp_n),Coverage=rep(NA,tmp_n)
                ,Mean_Estimate=rep(NA,tmp_n),SD_Estimate=rep(NA,tmp_n)
                ,Mean_SE=rep(NA,tmp_n) ,Type_S_Rate=rep(NA,tmp_n)
                ,Mean_Estimand=rep(NA,tmp_n),N_Sims=rep(NA,tmp_n))

j1<-j2<-1
for(i in 1:tmp_n){
  if(i%%2==0){
  dat[i,]<-dat2[j2,]
  j2<-j2+1
  }else{
  dat[i,]<-dat1[j1,]
  j1<-j1+1
  }
}
print(xtable(dat[,1:(ncol(dat)-1)],digits=2), include.rownames=FALSE)
```